#Install and load relevant packages
install.packages(c("mixlm","CBPS","mgcv","drtmle")) 
library(mixlm)
library(CBPS)
library(mgcv)
library(drtmle)

#Configure the sample size n, your beta value, and the number of repetitions, r.
n=300
beta <- 0.4
r <- 50

################## INCORRECT OUTCOME MODEL, CORRECT PROPENSITY MODEL #################

#Initialize.
ATE_true <- rep(NA,r)
ATE_glm <- rep(NA,r)
ATE_cbps <- rep(NA,r)
ATE_ocbps <- rep(NA,r)
ATE_gam <- rep(NA,r)
ATE_dr <- rep(NA,r)

bias_true <- rep(NA,r)
bias_glm <- rep(NA,r)
bias_cbps <- rep(NA,r)
bias_ocbps <- rep(NA,r)
bias_gam <- rep(NA,r)
bias_dr <- rep(NA,r)

lower_true <- rep(NA,r)
upper_true <- rep(NA,r)
coverage_true <- rep(NA,r)

lower_glm <- rep(NA,r)
upper_glm <- rep(NA,r)
coverage_glm <- rep(NA,r)

lower_gam <- rep(NA,r)
upper_gam <- rep(NA,r)
coverage_gam <- rep(NA,r)

#drtmle package provides a function for confidence intervals, so we don't need to initialize the CI vectors.
coverage_dr <- rep(NA,r)

lower_cbps <- rep(NA,r)
upper_cbps <- rep(NA,r)
coverage_cbps <- rep(NA,r)

lower_ocbps <- rep(NA,r)
upper_ocbps <- rep(NA,r)
coverage_ocbps <- rep(NA,r)

#Generate seeds to use in each iteration for reproducible results.
set.seed(123456)

for(j in 1:r){
  
  #Initialize the X matrix.
  
  #Generate the covariates according to the simulation settings in the paper.
  X_1 <- rnorm(n,3,sqrt(2))
  X_2 <- rnorm(n,0,1)
  X_3 <- rnorm(n,0,1)
  X_4 <- rnorm(n,0,1)
  
  X_mat <- as.matrix(cbind(rep(1,n), X_1, X_2, X_3, X_4))
  
  #Create a transformed version of the covariates to be used in the incorrect outcome model.
  X_mat_quad <- as.matrix(cbind(rep(1,n), (X_1)^2, (X_2)^2, (X_3)^2, (X_4)^2))
  
  #Initialize the Y_1 and Y_0 vector according to the simulation settings in the paper.
  Y_1 <- X_mat_quad %*% matrix(c(200, 27.4, 13.7, 13.7, 13.7), 5, 1) + rnorm(n)
  Y_0 <- X_mat_quad %*% matrix(c(200, 0 , 13.7, 13.7, 13.7), 5, 1) + rnorm(n)
  
  #True Propensity Score calculation.
  pre_prop <- X_mat[,2:5] %*% matrix(c(-beta, 0.5, -0.25, -0.1), 4, 1)
  propensity_true <- (exp(pre_prop))/(1+(exp(pre_prop)))
  
  #Generate T_vec, the binary vector of treatment assignments, with the true propensity scores.
  T_vec <- rbinom(n, size=1, prob=propensity_true)
  
  #Now generate the actual outcome, Y_outcome.
  Y_outcome <- Y_1*T_vec + Y_0*(1-T_vec)
  
  #First, fit the linear regression model for T=1 and T=0.
  Y_outcome_1 <- Y_outcome[which(T_vec==1)]
  Y_outcome_0 <- Y_outcome[which(T_vec==0)]
  
  X_mat_1 <- X_mat[which(T_vec==1),c(2,3,4,5)]
  X_mat_0 <- X_mat[which(T_vec==0),c(2,3,4,5)]
  
  lin_1 <- lm(Y_outcome_1 ~ X_mat_1) 
  lin_0 <- lm(Y_outcome_0 ~ X_mat_0)
  
  sigma_hat_1 <- summary(lin_1)$s #residual standard error for T=1
  sigma_hat_0 <- summary(lin_0)$s #residual standard error for T=0
  
  fitted_1 <- X_mat%*%as.matrix(lin_1$coefficients,5,1)
  fitted_0 <- X_mat%*%as.matrix(lin_0$coefficients,5,1)
  
  L_hat <- fitted_1 - fitted_0
  K_hat <- fitted_0
  
  # #Add the quadratic regression
  # X_mat_quad_1 <- X_mat_quad[which(T_vec==1),c(2,3,4,5)]
  # X_mat_quad_0 <- X_mat_quad[which(T_vec==0),c(2,3,4,5)]
  #
  # quad_1 <- lm(Y_outcome_1 ~ X_mat_quad_1)
  # quad_0 <- lm(Y_outcome_0 ~ X_mat_quad_0)
  #
  # quad_fitted_1 <- X_mat_quad%*%as.matrix(quad_1$coefficients,5,1)
  # quad_fitted_0 <- X_mat_quad%*%as.matrix(quad_0$coefficients,5,1)
  #
  # L_quad_hat <- quad_fitted_1 - quad_fitted_0
  # K_quad_hat <- quad_fitted_0
  #
  # #Compare the two approaches.Using quadratic regression is MUCH better in terms of estimation error.
  # sum((Y_outcome_1 - quad_fitted_1[which(T_vec==1)])^2)
  # sum((Y_outcome_0 - quad_fitted_0[which(T_vec==0)])^2)
  #
  # sum((Y_outcome_1 - fitted_1[which(T_vec==1)])^2)
  # sum((Y_outcome_0 - fitted_0[which(T_vec==0)])^2)
  #
  # #How different are L_hat and K_hat from L_quad_hat and K_quad_hat? VERY different. 
  # L_hat - L_quad_hat
  # K_hat - K_quad_hat
  
  
  ####### TRUE IPTW #######
  #Calculating the ATE.
  ATE_true[j] <- mean((T_vec*Y_outcome/propensity_true) - (((1-T_vec)*Y_outcome)/(1-propensity_true)))
  
  #Calculate the bias:
  bias_true[j] <- ATE_true[j] - 301.4 #mu=E(L(X))=27.4*E(X_1^2)=27.4*11=301.4
  
  asy_var_true_hat <- mean((fitted_1)^2/propensity_true + (fitted_0)^2/(1-propensity_true) - (rep(mean(fitted_1 - fitted_0),n))^2)
  
  #Calculate the 95% CI.
  diff_true <- 1.96*sqrt(asy_var_true_hat/n)
  lower_true[j] <- ATE_true[j] - diff_true
  upper_true[j] <- ATE_true[j] + diff_true
  
  #Check if the true value of mu=301.4 is within this CI.
  if(301.4 >= lower_true[j] & 301.4 <= upper_true[j]){
    coverage_true[j] <- 1
  }else{
    coverage_true[j] <- 0
  }
  
  
  ####### GLM IPTW #######
  #Calculating the ATE.
  glm.fit <- glm(T_vec ~ X_1 + X_2 + X_3 + X_4, family="binomial", REML=FALSE)
  pre_prop_glm <- X_mat[,1:5] %*% matrix(coefficients(glm.fit), 5, 1)
  propensity_glm <- (exp(pre_prop_glm))/(1+(exp(pre_prop_glm)))
  ATE_glm[j] <- mean((T_vec*Y_outcome/propensity_glm) - (((1-T_vec)*Y_outcome)/(1-propensity_glm)))
  
  #Calculate the bias:
  bias_glm[j] <- ATE_glm[j] - 301.4
  
  #95% CI 
  #Fisher Information Matrix
  I <- array(rep(NA,5*5*n),dim=c(5,5,n))
  
  for(i in 1:n){
    exp_term <- exp(t(coefficients(glm.fit))%*%matrix(X_mat[i,],5,1))
    I[,,i] <- -matrix(rep(exp_term/((1+exp_term)^2),5*5),5,5)*(matrix(X_mat[i,],5,1)%*%matrix(X_mat[i,],1,5))
  }
  
  I_mat <- -apply(I,c(1,2),mean)
  
  #H_0_hat_glm
  prop_modified_glm <- matrix(propensity_glm,1,n)/(1+exp(matrix(coefficients(glm.fit),1,5)%*%t(X_mat)))
  der_mat_glm <- matrix(rep(prop_modified_glm,each=5),5,n)
  derivative_glm <- matrix(rep(NA,5*n),5,n)
  for(i in 1:n){
    derivative_glm[,i] <- matrix(der_mat_glm[,i]*X_mat[i,],5,1) 
  }
  
  temp_sum <- matrix(rep(0,5),5,1)
  for(i in 1:n){
    temp_sum <- temp_sum + derivative_glm[,i]*(K_hat[i] + (1-propensity_glm[i])*L_hat[i])/(propensity_glm[i]*(1-propensity_glm[i]))
  }
  H_0_hat_glm <- -temp_sum/n
  
  #Sigma_mu_glm
  Sigma_mu_glm <- mean((fitted_1)^2/propensity_glm + (fitted_0)^2/(1-propensity_glm)) - (ATE_glm[j])^2
  
  #Asymptotic Variance
  asy_var_glm_hat <- Sigma_mu_glm - t(H_0_hat_glm)%*%solve(I_mat)%*%H_0_hat_glm
  
  #Calculate the 95% CI.
  diff_glm <- 1.96*sqrt(asy_var_glm_hat/n)
  lower_glm[j] <- ATE_glm[j] - diff_glm
  upper_glm[j] <- ATE_glm[j] + diff_glm
  
  #Check if the true value of mu=301.4 is within this CI.
  if(301.4 >= lower_glm[j] & 301.4 <= upper_glm[j]){
    coverage_glm[j] <- 1
  }else{
    coverage_glm[j] <- 0
  }
  
  
  ####### CBPS Method #######
  #Calculating the ATE.
  cbps.fit <- CBPS(T_vec ~ X_mat, ATT=0, method="exact")
  propensity_cbps <- fitted.values(cbps.fit)
  ATE_cbps[j] <- mean((T_vec*Y_outcome/propensity_cbps) - (((1-T_vec)*Y_outcome)/(1-propensity_cbps)))
  
  #Calculate the bias:
  bias_cbps[j] <- ATE_cbps[j] - 301.4
  
  #Calculate the 95% CI
  #omega_hat (Look at the Var(g_beta) formula on page 7 in the paper! 
  #Because this is omega_hat according to page 35 right under (A.2).)
  new_X_2 <- array(rep(NA,5*5*n),dim=c(5,5,n))
  for(i in 1:n){
    new_X_2[,,i] <- matrix(X_mat[i,],5,1)%*%t(X_mat[i,])/(propensity_cbps[i]*(1-propensity_cbps[i]))
  }
  omega_hat <- apply(new_X_2,c(1,2),mean)
  
  #Sigma_mu_hat
  Sigma_mu_hat <- mean(((fitted_1)^2/propensity_cbps) + ((fitted_0)^2/(1-propensity_cbps))) - (ATE_cbps[j])^2
  
  #Cov(mu_beta, g_beta)
  L_hat <- fitted_1 - fitted_0
  K_hat <- fitted_0
  
  new_X_3 <- matrix(rep(NA,n*5),n,5)
  for(i in 1:n){
    new_X_3[i,] <- X_mat[i,]*(K_hat[i] + (1-propensity_cbps[i])*L_hat[i])/(propensity_cbps[i]*(1-propensity_cbps[i]))
  }
  
  cov_hat <- apply(new_X_3,2,mean)
  
  #H_0_hat
  prop_modified <- propensity_cbps/(1+exp(matrix(cbps.fit$coefficients,1,5)%*%t(X_mat)))
  der_mat <- matrix(rep(prop_modified,each=5),5,n)
  derivative <- matrix(rep(NA,5*n),5,n)
  for(i in 1:n){
    derivative[,i] <- matrix(der_mat[,i]*X_mat[i,],5,1) 
  }
  
  temp_sum <- matrix(rep(0,5),5,1)
  for(i in 1:n){
    temp_sum <- temp_sum + derivative[,i]*(K_hat[i] + (1-propensity_cbps[i])*L_hat[i])/(propensity_cbps[i]*(1-propensity_cbps[i]))
  }
  H_0_hat <- -temp_sum/n
  
  #H_f_hat
  temp_sum_2 <- matrix(rep(0,5*5),5,5)
  for(i in 1:n){
    temp_sum_2 <- temp_sum_2 + ( matrix(X_mat[i,],5,1)%*%matrix(derivative[,i],1,5)/(propensity_cbps[i]*(1-propensity_cbps[i]))  )
  }
  H_f_hat <- -temp_sum_2/n
  
  #Put it all together
  asy_var_cbps_hat <- Sigma_mu_hat + t(H_0_hat)%*%solve(t(H_f_hat)%*%solve(omega_hat)%*%H_f_hat)%*%H_0_hat -
    2*t(H_0_hat)%*%solve(H_f_hat)%*%cov_hat
  
  #Calculate the 95% CI.
  diff_cbps <- 1.96*sqrt(asy_var_cbps_hat/n)
  lower_cbps[j] <- ATE_cbps[j] - diff_cbps
  upper_cbps[j] <- ATE_cbps[j] + diff_cbps
  
  #Check if the true value of mu=301.4 is within this CI.
  if(301.4 >= lower_cbps[j] & 301.4 <= upper_cbps[j]){
    coverage_cbps[j] <- 1
  }else{
    coverage_cbps[j] <- 0
  }
  
  
  ####### oCBPS Method #######
  #Calculating the ATE.
  ocbps.fit <- CBPS(T_vec ~ X_mat, ATT=0, baseline.formula = ~X_mat[,c(1,3:5)], diff.formula = ~X_mat[,2])
  beta_hat_ocbps <- matrix(ocbps.fit$coefficients,5,1)
  propensity_ocbps <- fitted.values(ocbps.fit)
  ATE_ocbps[j] <- mean((T_vec*Y_outcome/propensity_ocbps) - (((1-T_vec)*Y_outcome)/(1-propensity_ocbps)))
  
  #Calculate the bias:
  bias_ocbps[j] <- ATE_ocbps[j] - 301.4

  ####### Calculate the 95% CI.
  #Use the oCBPS asymptotic variance expression for when both models are correctly specified. (Otherwise we get unstable results.)
  asy_var_ocbps_hat <- mean((sigma_hat_1^2)/propensity_ocbps + (sigma_hat_0^2)/(1-propensity_ocbps) + (L_hat - rep(ATE_ocbps[j],n))^2)
  
  diff_ocbps <- 1.96*sqrt(asy_var_ocbps_hat/n)
  lower_ocbps[j] <- ATE_ocbps[j] - diff_ocbps
  upper_ocbps[j] <- ATE_ocbps[j] + diff_ocbps
  
  #Check if the true value of mu=301.4 is within this CI.
  if(301.4 >= lower_ocbps[j] & 301.4 <= upper_ocbps[j]){
    coverage_ocbps[j] <- 1
  }else{
    coverage_ocbps[j] <- 0
  }
  
  ####### GAM IPTW #######
  #Calculating the ATE.
  gam.fit <- gam(T_vec ~ s(X_1) + s(X_2) + s(X_3) + s(X_4), family="binomial", method="REML")
  pre_prop_gam <- as.matrix(predict(gam.fit))
  propensity_gam <- (exp(pre_prop_gam))/(1+(exp(pre_prop_gam)))
  ATE_gam[j] <- mean((T_vec*Y_outcome/propensity_gam) - (((1-T_vec)*Y_outcome)/(1-propensity_gam)))
  if(is.nan(ATE_gam[j])==TRUE){
    break
  }
  
  #Calculate the bias:
  bias_gam[j] <- ATE_gam[j] - 301.4
  
  asy_var_gam_hat <- mean((sigma_hat_1^2)/propensity_gam + (sigma_hat_0^2)/(1-propensity_gam) + (L_hat - rep(ATE_gam[j],n))^2)
  
  #Calculate the 95% CI.
  diff_gam <- 1.96*sqrt(asy_var_gam_hat/n)
  lower_gam[j] <- ATE_gam[j] - diff_gam
  upper_gam[j] <- ATE_gam[j] + diff_gam
  
  #Check if the true value of mu=301.4 is within this CI.
  if(301.4 >= lower_gam[j] & 301.4 <= upper_gam[j]){
    coverage_gam[j] <- 1
  }else{
    coverage_gam[j] <- 0
  }
  
  
  ####### DR #######
  #Calculating the ATE.
  npreg_fit <- drtmle(W = as.data.frame(cbind(X_1,X_2,X_3,X_4)),
                      A = T_vec,
                      a_0 = c(0,1),
                      Y = Y_outcome, family = binomial(),
                      SL_g = "SL.npreg", 
                      SL_Q = "SL.npreg",
                      SL_gr = "SL.npreg", 
                      SL_Qr = "SL.npreg",
                      stratify = TRUE)
  
  ATE_dr[j] <- npreg_fit$drtmle$est[2] - npreg_fit$drtmle$est[1]
  
  #Calculate the bias:
  bias_dr[j] <- ATE_dr[j] - 301.4
  
  #Calculate the 95% CI.
  ci_dr <- ci(npreg_fit,contrast=c(-1,1)) 
  
  #Check if the true value of mu=301.4 is within this CI.
  if(301.4 >= ci_dr$drtmle[1,2] & 301.4 <= ci_dr$drtmle[1,3]){
    coverage_dr[j] <- 1
  }else{
    coverage_dr[j] <- 0
  }
  
  
  print(j)
}

###### RESULTS ######

#True IPTW results
Bias_true <- mean(bias_true)
SD_true <- sd(ATE_true)
RMSE_true <- sqrt((sd(ATE_true))^2 + (mean(bias_true))^2)
coverage_prob_true <- mean(coverage_true)

#GLM IPTW results
Bias_glm <- mean(bias_glm)
SD_glm <- sd(ATE_glm)
RMSE_glm <- sqrt((sd(ATE_glm))^2 + (mean(bias_glm))^2)
coverage_prob_glm <- mean(coverage_glm)

#CBPS results
Bias_cbps <- mean(bias_cbps)
SD_cbps <- sd(ATE_cbps)
RMSE_cbps <- sqrt((sd(ATE_cbps))^2 + (mean(bias_cbps))^2)
coverage_prob_cbps <- mean(coverage_cbps)

#oCBPS results
Bias_ocbps <- mean(bias_ocbps)
SD_ocbps <- sd(ATE_ocbps)
RMSE_ocbps <- sqrt((sd(ATE_ocbps))^2 + (mean(bias_ocbps))^2)
coverage_prob_ocbps <- mean(coverage_ocbps)

#GAM results
Bias_gam <- mean(bias_gam)
SD_gam <- sd(ATE_gam)
RMSE_gam <- sqrt((sd(ATE_gam))^2 + (mean(bias_gam))^2)
coverage_prob_gam <- mean(coverage_gam)

#DR results
Bias_dr <- mean(bias_dr)
SD_dr <- sd(ATE_dr)
RMSE_dr <- sqrt((sd(ATE_dr))^2 + (mean(bias_dr))^2)
coverage_prob_dr <- mean(coverage_dr)


temp <- matrix(c(round(c(Bias_true, SD_true, RMSE_true),2), 
                 round(c(Bias_glm, SD_glm, RMSE_glm),2),
                 round(c(Bias_gam, SD_gam, RMSE_gam),2),
                 round(c(Bias_dr, SD_dr, RMSE_dr),2),
                 round(c(Bias_cbps, SD_cbps, RMSE_cbps),2),
                 round(c(Bias_ocbps, SD_ocbps, RMSE_ocbps),2)),
               nrow=6,ncol=3,byrow=T)

coverage <- c(coverage_prob_true, coverage_prob_glm, coverage_prob_gam, coverage_prob_dr, coverage_prob_cbps, coverage_prob_ocbps)
results_untruetrue <- matrix(temp,18,1)
results_untruetrue <- c(results_untruetrue,coverage)

#The results to go into the chart.
UT_results <- matrix(results_untruetrue,24,1)

rownames(UT_results)= c("BIAS True", "BIAS GLM", "BIAS GAM", "BIAS DR", "BIAS CBPS", "BIAS oCBPS", 
                        "SD True", "SD GLM", "SD GAM", "SD DR", "SD CBPS", "SD oCBPS",
                        "RMSE True", "RMSE GLM", "RMSE GAM", "RMSE DR", "RMSE CBPS", "RMSE oCBPS",
                        "Coverage Prob. True", "Coverage Prob. GLM", "Coverage Prob. GAM", "Coverage Prob. DR",
                        "Coverage Prob. CBPS", "Coverage Prob. oCBPS")

colnames(UT_results) = "Incorrect Outcome Model, Correct Propensity Score Model"

#Final results in table form with relevant labels
UT_results
